In [4]:
import numpy as np 
import pandas as pd
import matplotlib.pyplot as plt
import plotly.graph_objs as go

S&P 500 stock data DataSet¶

Historical stock data for all current S&P 500 companies

Source: https://www.kaggle.com/datasets/camnugent/sandp500?datasetId=1908&sortBy=voteCount

In [5]:
#get dataset from file.

df = pd.read_csv('all_stocks_5yr.csv')
df.head()
Out[5]:
date open high low close volume Name
0 2013-02-08 15.07 15.12 14.63 14.75 8407500 AAL
1 2013-02-11 14.89 15.01 14.26 14.46 8882000 AAL
2 2013-02-12 14.45 14.51 14.10 14.27 8126000 AAL
3 2013-02-13 14.30 14.94 14.25 14.66 10259500 AAL
4 2013-02-14 14.94 14.96 13.16 13.99 31879900 AAL
In [6]:
#check data have null or not

df.isna()
Out[6]:
date open high low close volume Name
0 False False False False False False False
1 False False False False False False False
2 False False False False False False False
3 False False False False False False False
4 False False False False False False False
... ... ... ... ... ... ... ...
619035 False False False False False False False
619036 False False False False False False False
619037 False False False False False False False
619038 False False False False False False False
619039 False False False False False False False

619040 rows × 7 columns

In [7]:
#get data of AAPL stock only
df_aapl = df.loc[df['Name'] == 'AAPL']
df_aapl.head()
Out[7]:
date open high low close volume Name
1259 2013-02-08 67.7142 68.4014 66.8928 67.8542 158168416 AAPL
1260 2013-02-11 68.0714 69.2771 67.6071 68.5614 129029425 AAPL
1261 2013-02-12 68.5014 68.9114 66.8205 66.8428 151829363 AAPL
1262 2013-02-13 66.7442 67.6628 66.1742 66.7156 118721995 AAPL
1263 2013-02-14 66.3599 67.3771 66.2885 66.6556 88809154 AAPL
In [8]:
df_aapl.info()
<class 'pandas.core.frame.DataFrame'>
Int64Index: 1259 entries, 1259 to 2517
Data columns (total 7 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   date    1259 non-null   object 
 1   open    1259 non-null   float64
 2   high    1259 non-null   float64
 3   low     1259 non-null   float64
 4   close   1259 non-null   float64
 5   volume  1259 non-null   int64  
 6   Name    1259 non-null   object 
dtypes: float64(4), int64(1), object(2)
memory usage: 78.7+ KB
In [9]:
#change type data of date column to datetime.
df_aapl.loc[:,'date'] = pd.to_datetime(df_aapl.loc[:,'date'],format="%Y/%m/%d")
df_aapl.info()
<class 'pandas.core.frame.DataFrame'>
Int64Index: 1259 entries, 1259 to 2517
Data columns (total 7 columns):
 #   Column  Non-Null Count  Dtype         
---  ------  --------------  -----         
 0   date    1259 non-null   datetime64[ns]
 1   open    1259 non-null   float64       
 2   high    1259 non-null   float64       
 3   low     1259 non-null   float64       
 4   close   1259 non-null   float64       
 5   volume  1259 non-null   int64         
 6   Name    1259 non-null   object        
dtypes: datetime64[ns](1), float64(4), int64(1), object(1)
memory usage: 78.7+ KB
/var/folders/6b/grn5vmsd7rs9lmn4336vbb3h0000gn/T/ipykernel_29984/3505223316.py:2: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_aapl.loc[:,'date'] = pd.to_datetime(df_aapl.loc[:,'date'],format="%Y/%m/%d")
In [10]:
#AAPL Open and Close, High and Low, Volume Price in Historical(2013-1018)

fig, (ax1,ax2,ax3)= plt.subplots(3, 1, figsize=(8,8), constrained_layout = True)

ax1.plot(df_aapl["date"],df_aapl["open"],color="green", label="Open Price")
ax1.plot(df_aapl["date"], df_aapl["close"],color="red",label = "Close Price")
ax1.set_xlabel("Year")
ax1.set_ylabel("Stock Price")
ax1.legend()
ax1.set_title("AAPL Open and Close Price in Historical 2013-2018", loc = "right" , weight='bold')

ax2.plot(df_aapl["date"],df_aapl["high"],color="green", label="High Price")
ax2.plot(df_aapl["date"], df_aapl["low"],color="red",label = "Low Price")
ax2.set_xlabel("Year")
ax2.set_ylabel("High-Low Price of AALP")
ax2.legend()
ax2.set_title("AAPL High-Low Price in Historical 2013-2018",loc = "right" , weight='bold')

ax3.plot(df_aapl["date"],df_aapl["volume"], label="Volume")
ax3.set_xlabel("Year")
ax3.set_ylabel("Volumn of Stock")
ax3.legend()
ax3.set_title("Volume in Historical 2013-2018", loc = "right" , weight='bold')

plt.show()
In [ ]:
#Trading Volume of AAPL in historical 2013-2018

fig = go.Figure(data=go.Ohlc(x=df_aapl['date'],open=df_aapl['open'],high=df_aapl['high'],low=df_aapl['low']
                             ,close=df_aapl['close'],increasing=dict(line=dict(color= '#58FA58'))
                             ,decreasing=dict(line=dict(color= '#FA5858'))))
fig.update_layout(
title = 'Apple Historical Price (USD$)',
yaxis_title = 'AAPL stock')

fig.show()